home *** CD-ROM | disk | FTP | other *** search
Text File | 1989-06-03 | 54.5 KB | 1,841 lines |
- Newsgroups: comp.sources.misc
- From: allbery@uunet.UU.NET (Brandon S. Allbery - comp.sources.misc)
- Subject: v07i012: Source of strings, a replacement for the BSD command of the same name.
- Reply-To: greim@sbsvax.informatik.uni-saarland.dbp.de (Michael Greim)
-
- Posting-number: Volume 7, Issue 12
- Submitted-by: greim@sbsvax.informatik.uni-saarland.dbp.de (Michael Greim)
- Archive-name: nstrings.bsd
-
- [Anyone want to volunteer to convert this for COFF? ++bsa]
-
- Here is strings, a replacement for the BSD program of the same name,
- for distribution in comp.sources.misc.
-
- I wrote this because I became annoyed by
- - old strings is slow
- - old strings is not correct. It thinks ^L and sometimes even 0x80 to be
- printable. (See README for more details)
- [Amen to that! ++bsa]
- - old strings has trouble with the segment of initialized data on some
- systems
- - on some systems there is strings, on some there isn't
-
- It is not only for UNIX systems, but I have only tried it on UNIX and
- VMS so far.
-
- Absorb, apply and enjoy,
- -mg
-
- --
- Michael Greim Email : greim@sbsvax.informatik.uni-saarland.dbp.de
- or : ...!uunet!unido!sbsvax!greim
-
- #! /bin/sh
- # This is a shell archive. Remove anything before this line, then unpack
- # it by saving it into a file and typing "sh file". To overwrite existing
- # files, type "sh file -c". You can also feed this as standard input via
- # unshar, or by typing "sh <file", e.g.. If this archive is complete, you
- # will see the following message at the end:
- # "End of archive 1 (of 1)."
- # Contents: COPYING MANIFEST Makefile README config.h limits.c
- # output.c strings.1 strings.c strings.h strings.txt test_input
- # tune.h
- # Wrapped by greim@sbsvax on Tue May 23 18:13:15 1989
- PATH=/bin:/usr/bin:/usr/ucb ; export PATH
- if test -f 'COPYING' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'COPYING'\"
- else
- echo shar: Extracting \"'COPYING'\" \(1268 characters\)
- sed "s/^X//" >'COPYING' <<'END_OF_FILE'
- X
- X strings
- X
- X (C) Copyright by Michael Greim, 5/23/89
- X
- X-------------------------------------------------------------
- X
- XThe author, Michael Greim, allows anyone to do anything he/she/it wants with
- X"strings", provided the following conditions are met:
- X
- X- the program may not be sold without the authors written permission
- X- this notice stays in place
- X- any changes to the original are marked as such
- X
- X
- XThe author explicitly asks anybody :
- X
- X- to port this program to as much computer systems as possible and send
- X him complaints, results, bug fixes, praise ...
- X- to incorporate this program in any package of Public Domain programs for
- X free distribution (GNU would be welcome)
- X
- X
- XThere is no warranty of merchantability nor any warranty of fitness for a
- Xparticular purpose nor any other warranty, either express or implied, as
- Xto the accuracy of the herewith published program, or as to its
- Xsuitability for any particular purpose.
- XAccordingly, I, Michael Greim, assume no responsibility
- Xfor its use by the recipient. Further, I, Michael Greim, assume no
- Xobligation to furnish any assistance of any kind whatsoever,
- Xor to furnish any additional information or documentation.
- X
- X(But if you ask me nice, I might consider to think about starting to
- X speculate about it ... :-)
- END_OF_FILE
- if test 1268 -ne `wc -c <'COPYING'`; then
- echo shar: \"'COPYING'\" unpacked with wrong size!
- fi
- # end of 'COPYING'
- fi
- if test -f 'MANIFEST' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'MANIFEST'\"
- else
- echo shar: Extracting \"'MANIFEST'\" \(830 characters\)
- sed "s/^X//" >'MANIFEST' <<'END_OF_FILE'
- X File Name Archive # Description
- X-----------------------------------------------------------
- X COPYING 1 Copyright Notice.
- X MANIFEST 1 This shipping list
- X Makefile 1
- X README 1 What is strings ? How to port it.
- X config.h 1 Default parameters for different machines.
- X limits.c 1 Get limits of initialized data segment.
- X output.c 1 Output routines.
- X strings.1 1 Manual entry.
- X strings.c 1 Main part of program.
- X strings.h 1
- X strings.txt 1 Manual entry as plain text file.
- X test_input 1 sample (btoa'd) file to demonstrate errors of old "strings".
- X tune.h 1 Parameter settings for current machine.
- END_OF_FILE
- if test 830 -ne `wc -c <'MANIFEST'`; then
- echo shar: \"'MANIFEST'\" unpacked with wrong size!
- fi
- # end of 'MANIFEST'
- fi
- if test -f 'Makefile' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'Makefile'\"
- else
- echo shar: Extracting \"'Makefile'\" \(907 characters\)
- sed "s/^X//" >'Makefile' <<'END_OF_FILE'
- X#
- X# Makefile for 'strings'
- X#
- X# sccsid = @(#) REL.Mfile (v1.3 5/22/89)
- X#
- XSHELL=/bin/sh
- X
- X# If you want to play it safe, define SAFETY_FIRST. The program will most
- X# probably work and you won't have to spend time on porting it.
- X# If you have defined things yourself and you want the program
- X# to use these definitions, you must define USE_USER_DEFINES.
- X# If you don't define SAFETY_FIRST and USE_USER_DEFINES, the program
- X# will try to identify your system and then use its defaults.
- XUFLAGS=
- X#UFLAGS=-DUSE_USER_DEFINES
- X#UFLAGS=-DSAFETY_FIRST
- X
- X# Define DEBUG to get a lot of debug output on "prot".
- XDEBUGFLAGS=
- X
- XCFLAGS= -O ${DEBUGFLAGS} ${UFLAGS}
- X
- XHEADERS= strings.h config.h tune.h
- XOBJS= strings.o limits.o output.o
- X
- Xall: strings
- X
- Xstrings: ${OBJS}
- X cc -o strings ${OBJS}
- X
- X${OBJS}: ${HEADERS}
- X
- X#strings.o: strings.h config.h tune.h
- X#limits.o : strings.h config.h tune.h
- X#output.o : strings.h config.h tune.h
- END_OF_FILE
- if test 907 -ne `wc -c <'Makefile'`; then
- echo shar: \"'Makefile'\" unpacked with wrong size!
- fi
- # end of 'Makefile'
- fi
- if test -f 'README' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'README'\"
- else
- echo shar: Extracting \"'README'\" \(7601 characters\)
- sed "s/^X//" >'README' <<'END_OF_FILE'
- X1.) What do we have here?
- X
- XThis is the README file for 'strings'. 'strings' is a rewrite
- Xand replacement for the 4BSD program of the same name.
- X'strings' looks for sequences of printable characters in a file and
- Xoutputs them.
- X
- XCurrent version is 1.6.9.
- X
- X(To get best results when reading these files, use an option in your
- X favourite editor to expand a TAB to 4 SPACEs. E.g. in vi it is
- X "set tabstop=4")
- X
- XYou should have the files:
- X README - this file
- X COPYING - Copyright Notice
- X Makefile - Makefile
- X strings.h
- X config.h
- X tune.h
- X strings.c - the main source
- X limits.c - the UNIX (trademark of AT&T) specific stuff to identify
- X an initialized data segment
- X output.c - output routines
- X test_input - a file containing the 2 characters the original strings
- X stumbled over. Unpack it with atob.
- X strings.1 - manual page.
- X strings.txt - manual page without nroff sequences
- X
- X2.) How to build strings.
- X
- XNow that you have strings you will want to build it.
- XThe program is shipped with UFLAGS undefined (see below for an explanation).
- XOn UNIX (trademark of AT&T) systems, you should be able to build the
- Xprogram by just typing "make". On non-UNIX systems you might have problems.
- X
- Xa.) edit Makefile. If you want to play it safe, set UFLAGS=-DSAFETY_FIRST.
- X strings should now compile without any problems.
- X You won't get the UNIX specific stuff: the program does not
- X try to identify the initialized data segment.
- X
- Xb.) If you don't want to play it safe, but rather you want to configure
- X strings to your system, take a look at config first. There is a
- X list of systems. If one of these is yours, edit Makefile and
- X set UFLAGS to nothing. When compiling the defines for the system
- X are used.
- X WARNING: some things may differ between different versions of the
- X same system. On some machines there is no easy way to distinguish
- X between such versions.
- X If you were wrong, and the system you are using is not in the
- X list of known symbols, the minimal defaults, like in a.) will be
- X used.
- X
- Xc.) You want to configure strings, and your system is not in the list
- X of known systems.
- X Edit Makefile and set UFLAGS=-DUSE_USER_DEFINES.
- X Edit tune.h and set things up for your system. The variables are
- X commented.
- X
- XThere are 3 header files. The inclusion works like this.
- X(you can skip this)
- X
- X (reading strings.h)
- X |
- X v
- X (including config.h)
- X |
- X v
- X <----- is SAFETY_FIRST defined?
- X | |
- X | | -
- X | v +
- X | is USE_USER_DEFINES defined? ---->use stuff from tune.h ---> continue
- X | |
- X | | -
- X | v
- X | is this machine 1 ?
- X | |
- X | | -
- X | v
- X | is this machine 2 ?
- X | |
- X | | -
- X | v
- X | ...
- X | |
- X | | -
- X v v
- X use safe defaults
- X |
- X v
- X continue
- X
- XThe program, or rather the headerfiles know about the following machines:
- X
- X- VAX 11/780 (4.3 BSD) by "unix" and "vax" and not "ultrix"
- X- SIEMENS PC-MX2 (SINIX v2.1) by "nsc3200" and "sinix" and "ns16000"
- X- Sun 3/260 (SunOS 3.5) by "unix" and "sun" and "mc68020"
- X- VAX 6800 (Ultrix 2.1) by "unix" and "ultrix" and "bsd4_2"
- X- uVAX (VMS 5.1) by "vms" and "vax"
- X
- X3.) Why is this strings better than the standard one?
- X
- X a.) This version of strings is at least 4 times faster than the original
- X one. If the minimal string length is set to something else, it might
- X even be 10 times faster.
- X b.) The original one had several bugs.
- X c.) This one is public domain. You get source.
- X
- X ad a.)
- X Here are results of some tests:
- X
- X machine: PC-MX2, OS: SINIX v2.1 file: /vmsinix (289084)
- X old : u 43.6 s 1.1 = 44.7
- X new : u 3.8 s 2.3 = 6.1
- X
- X machine: VAX 11/780 OS: 4.3BSD file: /vmunix (329728)
- X old : u 18.0 s 2.7 = 20.7
- X new : u 1.5 s 0.9 = 2.4
- X
- X machine: SUN 3/260 OS: SunOS 3.5 file: /vmunix (558359)
- X old : u 6.5 s 0.6 = 7.1
- X new : u 1.6 s 0.2 = 1.8
- X
- X machine: VAX 6800 OS: Ultrix 2.1 file: /vmunix (662528)
- X old : u 5.2 s 0.4 = 5.6
- X new : u 0.6 s 0.0 = 0.6
- X
- X User, sys and total times in seconds.
- X
- X ad b.)
- X The original strings
- X - thinks control-L (0x0c) is a printable character
- X - under some circumstances thinks 0x80 is printable. In the
- X package there is a file, test_input. Unpack this file with
- X atob. The file now contains several lines of characters including
- X a line with control-L and one with a 0x80. The original strings
- X errs for both cases.
- X - did not get the start address of the initialized data right on
- X some systems.
- X - had problems when dealing with the standard input.
- X The first two bugs have been found on 43BSD, SunOS and ULTRIX, the
- X third only on MX2 SINIX v2.1.
- X
- X4.) What about bugs?
- X
- X If you find bugs, tell me. If you fixed them or if you made an
- X extension which really is one, drop me a note.
- X
- X5.) Notes
- X
- X This program is about 7 times faster than the orignal one.
- X There are two reasons for this:
- X - It does not use fgetc/fputc to get or put characters, but
- X reads characters in blocks. It does not copy them but rather moves
- X pointers around on the input buffer. There is no
- X procedure call needed to get at each character.
- X When a sequence is found, it is put into the output buffer in one
- X block, thus there is no need, like in fputc, to check for possible
- X overflow for each character.
- X - When the program searches for a sequence of printable characters
- X it only examines each min_str_len character instead of each one.
- X min_str_len defaults to 4 and can be set with command line option
- X like "strings -3".
- X
- X It can be sped up some more, but then it would be difficult to port
- X it to different systems.
- X Example:
- X Currently the program takes 6.0 seconds on MX2 for /vmsinix.
- X The improved version only needs 5.5 seconds. It is also much smaller:
- X 6976 bytes compared to 10596.
- X Ways to improve the program:
- X - On some machines another method to test whether a character is printable
- X will be faster. Now the program uses an array (isp), uses a character
- X cast to a (signed) integer as index into this array (isp_mid is the
- X base from which offsets are computed).
- X On MX2, and, if I believe my tests, on VAX, it is faster, to use
- X unsigned characters as index into this array.
- X If you want to play around with this, just change CHAR_TYPE to
- X 'unsigned byte' and define the macro IS_PRINTABLE accordingly to
- X '(isp[c])'
- X - It makes a difference (although a small one), what basic type you
- X choose for the isp array. On MX2 short is best, but char is nearly
- X as good.
- X - You can make it smaller. The program does not need stdio. But
- X exit normally closes file descriptors, and therefore includes a
- X large part of the stdio stuff. Well, about 4 K on some systems.
- X If you know what your exit does, you can substitute a suitable
- X routine of your own. E.g. on MX2, exit calls _cleanup, which
- X only closes all open file descriptors. As I know that only the
- X standard descriptors are open at the end of the program, I can
- X write a _cleanup which only does a close on 0, 1, and 2.
- X
- X The savings that you get are almost invisible, they are not easily
- X portable, but rather require a certain amount of research on part
- X of the person doing the porting. I chose not to fit the programs with
- X options to adjust these things.
- X
- X There are still some DEBUG statements in the code. You get them
- X if you set DEBUGFLAGS=-DDEBUG.
- X
- X6.) Status
- X
- X This program is placed into the public domain.
- X The Copyright Notice in COPYING applies.
- X
- XAbsorb, apply and enjoy,
- X Michael Greim
- END_OF_FILE
- if test 7601 -ne `wc -c <'README'`; then
- echo shar: \"'README'\" unpacked with wrong size!
- fi
- # end of 'README'
- fi
- if test -f 'config.h' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'config.h'\"
- else
- echo shar: Extracting \"'config.h'\" \(2765 characters\)
- sed "s/^X//" >'config.h' <<'END_OF_FILE'
- X/*
- X * This file is used within strings.
- X * You can add specific defines for a machine below. Define neither
- X * SAFETY_FIRST nor USE_USER_DEFINES
- X * Defines for your current machine should go in tune.h. Define
- X * USE_USER_DEFINES.
- X *
- X * sccsid : @(#) config.h (v6.5 5/22/89)
- X *
- X * FOUND will contain a string telling what defines have been used.
- X */
- X
- X# ifndef SAFETY_FIRST
- X# ifdef USE_USER_DEFINES
- X# define FOUND "Using the user defines."
- X# include "tune.h"
- X# else USE_USER_DEFINES
- X /*
- X * Add (all) defines for your machine here.
- X */
- X# if defined(nsc32000) && defined(sinix) && defined(ns16000)
- X# define FOUND "Using defaults for sinix MX2."
- X# define FCNTL 2
- X# define FAST_COPY(from,to,count) bcopy(from,to,count)
- X# define WHAT_LSEEK 1
- X# define I_SPECIAL
- X# define IN_BUF_LEN 1024
- X# define OUT_BUF_LEN 8192
- X# define THRESHOLD 1024
- X# endif /* nsc32000 && sinix && ns16000 */
- X# if defined(unix) && defined(vax) && !defined(ultrix)
- X# define FOUND "Using defaults for VAX (BSD?)."
- X# define FCNTL 1
- X# define FAST_COPY(from,to,count) bcopy(from,to,count)
- X# define WHAT_LSEEK 1
- X# define I_SPECIAL
- X# define IN_BUF_LEN 4096
- X# define OUT_BUF_LEN 16384
- X# define THRESHOLD 4096
- X# endif /* vax */
- X# if defined(unix) && defined(sun) && defined(mc68020)
- X# define FOUND "Using defaults for SUN (SUN OS 3.*)."
- X# define FCNTL 1
- X# define FAST_COPY(from,to,count) bcopy(from,to,count)
- X# define WHAT_LSEEK 1
- X# define I_SPECIAL
- X# define IN_BUF_LEN 4096
- X# define OUT_BUF_LEN 16384
- X# define THRESHOLD 4096
- X# endif /* sun */
- X# if defined(unix) && defined(ultrix) && defined(bsd4_2)
- X# define FOUND "Using defaults for VAX (ULTRIX 2.0)"
- X# define FCNTL 1
- X# define FAST_COPY(from,to,count) bcopy(from,to,count)
- X# define WHAT_LSEEK 1
- X# define I_SPECIAL
- X# define IN_BUF_LEN 4096
- X# define OUT_BUF_LEN 16384
- X# define THRESHOLD 4096
- X# endif /* vax ultrix */
- X# if defined(vms) && defined(vax)
- X# define FOUND "Using defaults for VAX (VMS)"
- X/* don't define FCNTL, O_RDONLY flag is in file.h and is 0 */
- X# define FAST_COPY(from,to,count) memcpy(to,from,count)
- X# define WHAT_LSEEK 2
- X/* define I_SPECIAL*/
- X# define IN_BUF_LEN 4096
- X# define OUT_BUF_LEN 16384
- X# define THRESHOLD 4096
- X# endif /* vax ultrix */
- X# endif USE_USER_DEFINES
- X# endif SAFETY_FIRST
- X
- X# ifndef FOUND
- X/*
- X * We have to use the defaults.
- X */
- X# define FOUND "Using minimal defaults."
- X/* dont define FCNTL */
- X/* FAST_COPY will be a real routine */
- X# define WHAT_LSEEK 1
- X/* don't define I_SPECIAL */
- X# define IN_BUF_LEN 1024
- X# define OUT_BUF_LEN 8192
- X# define THRESHOLD 1024
- X
- X# endif FOUND
- END_OF_FILE
- if test 2765 -ne `wc -c <'config.h'`; then
- echo shar: \"'config.h'\" unpacked with wrong size!
- fi
- # end of 'config.h'
- fi
- if test -f 'limits.c' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'limits.c'\"
- else
- echo shar: Extracting \"'limits.c'\" \(1183 characters\)
- sed "s/^X//" >'limits.c' <<'END_OF_FILE'
- Xstatic char * sccsid = "@(#) limits.c (v6.2 5/19/89)";
- X
- X# include "strings.h"
- X
- X# ifdef I_SPECIAL
- X
- X# ifdef sun
- X# include <sys/exec.h>
- X# endif sun
- X# include <a.out.h>
- X
- Xget_limits (fd, first, last)
- Xint fd;
- XLSEEK_TYPE * first, * last;
- X/*
- X * Have a look into the input file open under fd.
- X * If we think it is an object file, than get the start and
- X * size of initialized data and set
- X * *first : address where examination of file will start
- X * *last : first address which will not be examined.
- X */
- X{
- X register int i;
- X struct exec e;
- X LSEEK_TYPE l;
- X
- X *first = (LSEEK_TYPE)0;
- X *last = (LSEEK_TYPE)(-1);
- X if (lseek (fd, (LSEEK_TYPE)0, 0) != (LSEEK_TYPE)0) {
- X# ifdef DEBUG
- X fprintf (prot, "Input not seekable\n");
- X# endif
- X return;
- X }
- X i = read (fd, (char*)(&e), sizeof (e));
- X if (i != sizeof(e)) {
- X# ifdef DEBUG
- X fprintf (prot, "file too small\n");
- X# endif
- X return;
- X }
- X if (N_BADMAG(e)) {
- X# ifdef DEBUG
- X fprintf (prot, "bad magic\n");
- X# endif
- X return;
- X }
- X l = N_TXTOFF(e) + e.a_text;
- X# ifdef DEBUG
- X fprintf (prot, "start of initialized data at %ld\n", (long)i);
- X fprintf (prot, "length = %1ld\n", (long)(e.a_data));
- X# endif
- X *first = l;
- X *last = l + e.a_data;
- X}
- X
- X# endif I_SPECIAL
- END_OF_FILE
- if test 1183 -ne `wc -c <'limits.c'`; then
- echo shar: \"'limits.c'\" unpacked with wrong size!
- fi
- # end of 'limits.c'
- fi
- if test -f 'output.c' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'output.c'\"
- else
- echo shar: Extracting \"'output.c'\" \(5953 characters\)
- sed "s/^X//" >'output.c' <<'END_OF_FILE'
- Xstatic char * sccsid = "@(#) output.c (v6.5 5/19/89)";
- X/*
- X * Output routines for 'strings'.
- X * We roll our own here, because stdio either expects NUL terminated
- X * strings, which we don't have, or tests for buffer overflow at each
- X * character. But we know how much characters will be added to the
- X * output block, so we can make do with two or three tests per sequence
- X * of printable characters.
- X *
- X * Sequences are stuffed into the output buffer, even temporarily when
- X * we could not decide what to do with them, yet had to remember the
- X * characters. When a temporary sequence is made permanent or a
- X * permanent sequence added, and the new length of data in the output
- X * buffer exceeds a threshold, it is output.
- X * If the buffer overflows in between it is extended with a call to
- X * realloc. This rarely ever happens, but if it happens, then
- X * you will rather have strings to be slowing down than dump core.
- X */
- X
- X# include "strings.h"
- X
- X/*
- X * The characters in out_buf until num_out_buf are already accepted.
- X * level points at the end of all characters in out_buf, even the
- X * temporarily saved ones.
- X * saved is the number of characters temporarily saved.
- X * Note that level and saved are not the same. If there are number before
- X * the sequences, then buf+num_out_buf+saved != level !!
- X * Note that sometimes I have to compute the number of characters between
- X * out_buf and level. I do this by subtracting the two pointers. I assume
- X * that the result is int or convertible to int, but I don't have the
- X * authority at hand to prove this.
- X */
- XCHAR_TYPE * out_buf;
- Xint num_out_buf = 0; /* numbers of chars in out_buf */
- XCHAR_TYPE * level;
- Xint buf_len;
- Xint saved = 0;
- X
- Xextern int ind_offset;
- Xextern int ind_prefix;
- Xextern CHAR_TYPE buf[];
- Xextern LSEEK_TYPE offset;
- Xextern char * cur_file_name;
- X
- Xextern char * malloc ();
- Xextern char * realloc ();
- X
- Xinit_output ()
- X{
- X out_buf = (CHAR_TYPE *) malloc (sizeof (CHAR_TYPE) * OUT_BUF_LEN);
- X level = out_buf;
- X num_out_buf = 0;
- X buf_len = OUT_BUF_LEN;
- X}
- X
- Xflush_output ()
- X/*
- X * Num_out_buf characters from the buffer are written.
- X */
- X{
- X if (num_out_buf > 0) {
- X# ifdef DEBUG
- X fprintf (prot, "flush_output : write %d chars\n", num_out_buf);
- X# endif
- X if (write (1, out_buf, num_out_buf) != num_out_buf) {
- X out ("PANIC : write error\n");
- X exit (1);
- X }
- X num_out_buf = 0;
- X }
- X level = out_buf;
- X}
- X
- Xstatic
- Xmake_room (n)
- Xregister int n;
- X/*
- X * Must reallocate.
- X * Buffer is enlarged by at least IN_BUF_LEN characters. The largest
- X * piece which has to be put into the buffer will be at most IN_BUF_LEN
- X * characters. If there is a sequence of puts, the buffer will be enlarged
- X * each time. This should happen very rarely.
- X */
- X{
- X register int i;
- X# ifdef DEBUG
- X fprintf (prot, "make_room :: REALLOC realloc called.\n");
- X fflush (prot);
- X# endif
- X /*
- X * Level is a pointer into out_buf. We have to save it here and
- X * restore it later, because realloc might cause a copying of the
- X * contents of out_buf to a new address.
- X */
- X i = level - out_buf;
- X buf_len += n>IN_BUF_LEN?n:IN_BUF_LEN;
- X# ifdef DEBUG
- X fprintf (prot, "make_room :: output buffer will be enlarged to %d\n", buf_len);
- X# endif
- X out_buf = (CHAR_TYPE *) realloc (out_buf, (unsigned int)buf_len);
- X if (out_buf == NULL) { /* panic */
- X (void)write (2, "REALLOC ERROR.\n", 15);
- X exit (1);
- X }
- X level = out_buf + i;
- X}
- X
- Xvoid
- Xadd_cur_file_name ()
- X/*
- X * Only called if command line flag '-p' was specified.
- X * Add the name of the current input file to the output stream.
- X */
- X{
- X register int i;
- X
- X if (cur_file_name == NULL)
- X return;
- X i = strlen (cur_file_name);
- X /*
- X * Is there space for (i+1) characters at the end of the output buffer?
- X * If not, flush it.
- X */
- X if ((int)(level - out_buf) + i + 1 > buf_len)
- X make_room (i+1);
- X FAST_COPY (cur_file_name, level, i);
- X level += i;
- X *level++ = ':';
- X}
- X
- Xappend (b1, b2, ind_perm)
- Xregister CHAR_TYPE * b1, * b2;
- Xint ind_perm;
- X/*
- X * Append a sequence permanently. Set the pointers.
- X * If there are more than THRESHOLD characters then write them.
- X */
- X{
- X register int n;
- X
- X /*
- X * If the user wants it and this is the first part of a sequence,
- X * add name of current file as prefix.
- X */
- X if (ind_prefix && saved == 0)
- X add_cur_file_name ();
- X /*
- X * If there are no saved characters and the user wants offsets,
- X * then we have to add an offset first.
- X */
- X if (ind_offset && saved == 0)
- X output_offset (b1);
- X /*
- X * Is there enough space in out outbuf ?
- X */
- X n = b2 - b1;
- X if ((int)(level - out_buf) + n + 1 > buf_len)
- X make_room (n);
- X /*
- X * Copy the sequence. It may be empty, so don't forget to add a LF here.
- X */
- X if (n > 0) {
- X FAST_COPY (b1, level, n);
- X level += n;
- X }
- X if (ind_perm == 1) {
- X *level++ = '\n';
- X /*
- X * There may be some temporarily saved characters in out_buf. Make
- X * them permanent.
- X */
- X num_out_buf = level - out_buf;
- X /*
- X * If necessary, write.
- X */
- X if (num_out_buf > THRESHOLD)
- X flush_output ();
- X saved = 0;
- X } else
- X saved = n;
- X}
- X
- Xoutput_offset (b1)
- XCHAR_TYPE * b1;
- X/*
- X * Convert n to its character representation in decimal and append this
- X * at the end of the output buffer.
- X * Normally we output 7 characters. If the number needs more we expand.
- X */
- X{
- X register int i;
- X register long j;
- X register LSEEK_TYPE l;
- X CHAR_TYPE * b;
- X
- X l = offset + (int)(b1 - buf);
- X /*
- X * How many characters must we output ?
- X */
- X for (i = 7, j = 10000000L; l >= j; j *= 10, i++);
- X
- X /*
- X * Is there space for (i+1) characters at the end of the output buffer?
- X * If not, flush it.
- X */
- X if ((int)(level - out_buf) + i+1 > buf_len)
- X make_room (i+1);
- X
- X b = level;
- X level += i+1;
- X
- X b[i--] = ' ';
- X /*
- X * Convert the number.
- X */
- X do {
- X b[i--] = '0' + l % 10;
- X l /= 10;
- X } while (l != 0);
- X /*
- X * Add some blanks in front of number
- X */
- X for (; i >= 0; i--)
- X b [i] = ' ';
- X}
- X
- X# ifdef DEBUG
- Xout (s)
- X{
- X fputc (s, stderr);
- X}
- X# else DEBUG
- X/*
- X * The main program calls a routine called 'out'.
- X */
- Xout (s)
- Xregister char * s;
- X{
- X (void)write (2, s, strlen(s));
- X}
- X# endif DEBUG
- END_OF_FILE
- if test 5953 -ne `wc -c <'output.c'`; then
- echo shar: \"'output.c'\" unpacked with wrong size!
- fi
- # end of 'output.c'
- fi
- if test -f 'strings.1' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'strings.1'\"
- else
- echo shar: Extracting \"'strings.1'\" \(4863 characters\)
- sed "s/^X//" >'strings.1' <<'END_OF_FILE'
- X.TH STRINGS 1 "5/22/89"
- X.SH NAME
- Xstrings - find and output the printable strings in a file
- X.SH SYNOPSIS
- X.B strings
- X[
- X.B -option
- X] [
- X.B filename
- X] ...
- X.br
- Xoption is one of a,-,o,p,t,v,e,c or a number.
- X.SH DESCRIPTION
- X.I strings
- Xlooks for sequences of printable characters in a file and outputs them, if
- Xthey are longer than \fInumber\fR. Default for \fInumber\fR is 4.
- X.br
- XIf
- X.I strings
- Xidentifies a file as containing an object, it will only scan the initialized
- Xdata segment of the object. If you specified either the "-a" or "-" flags
- Xit will scan the whole file.
- X.br
- XIf there is more than one filename specified, the name of the file
- Xis output before the stuff from that file.
- X.br
- XIf there is no filename specified \fIstrings\fR will read from standard input.
- X.SH OPTIONS
- X.B \-
- X.B \-a
- X'in +3
- Xexamine the entire file for sequences of printable characters. Default on
- XUNIX (trademark of AT&T) systems is to examine only the segment of
- Xinitialized data. On systems where such a segment does not exist, the
- Xoptions "-a" and "-" do not exist.
- X.in -3
- X.B \-o
- X'in +3
- Xprecede each output string by its offset from the
- Xbeginning of the file (in decimal).
- XThe number is output with (at least) 7 digits, filled with
- Xblanks. Then follows a blank, then the found string.
- XIf these 7 digits are not sufficent to hold the offset, \fIstrings\fR will use
- X8 digits or as much as necessary (and possible :-).
- X.in -3
- X.B number
- X'in +3
- Xonly output sequences longer than or as long as \fBnumber\fR. Only decimal
- Xvalues are accepted.
- X.in -3
- X.B \-p
- X'in +3
- Xprefix each output with the name of the input file. This is useful e.g. if
- X\fIstrings\fR is used on a list of files and \fIfgrep\fR is used to
- Xsearch for a certain string. When this string is found you will get
- Xthe name of the file the string was found in.
- X.in -3
- X.B \-e
- X'in +3
- Xthe next argument is interpreted as a filename, even if it starts with a "-".
- XThus "strings -e -a" will try to read from file "-a".
- X.in -3
- X.B \-t
- X'in +3
- XTAB is considered a printable character.
- X.in -3
- X.B \-c
- X'in +3
- Xoutput only strings which end with a NUL or LF character. Most strings
- Xin C programs will result in strings like this.
- X.in -3
- X.B \-v
- X'in +3
- Xprint information about \fIstrings\fR version, including the flags with
- Xwhich it was compiled. This may be uninteresting to the normal user,
- Xbut is quite informative when porting to another system. Nothing
- Xelse is output, the program exits immediately afterwards.
- X.in -3
- X.br
- X.LP
- XOptions can be specified in a single word. They apply to all files specified.
- XIt is an error for an option to be specified twice.
- XThe following invocations
- Xare correct:
- X.sp
- X.in +4
- Xstrings -ao12 m1 m2 m3
- X.br
- Xstrings -a -ot -12 m1
- X.br
- Xstrings -a m1 -o m2
- X.br
- Xstrings - -o
- X.br
- Xstrings core -a blabla -o rhabarber -12
- X.in -4
- X.sp
- XThe following invocations are incorrect:
- X.sp
- X.in +4
- Xstrings -a - BBB
- X.br
- Xstrings -aa m
- X.br
- Xstrings -12a14 core
- X.br
- X.in -4
- X.SH AUTHOR
- XMichael Greim
- X.sp
- Xgreim@sbsvax.uucp
- X.br
- Xgreim@sbsvax.informatik.uni-saarland.dbp.de
- X.SH DIAGNOSTICS
- XDiagnostics are always written to file descriptor 2, i.e. if a file cannot
- Xbe opened or malloc runs out of memory.
- X.br
- XIf the user makes an error when invoking \fIstrings\fR, a table with
- Xthe correct syntax will be output.
- X.br
- XIf the output buffer overflows, the program tries to enlarge it using
- Xrealloc. If this fails, it prints a message and exits
- Ximmediately.
- X.SH BUGS
- X.br
- X\fIStrings\fR does not inform the user, whether it has found an object file
- Xor not.
- X.sp
- XIf you have a file which contains a long sequence of printable characters,
- Xe.g. about as long as to become as large as your computers memory size,
- X\fIstrings\fR might run out of memory. It will print "REALLOC error" and
- Xexit.
- X.sp
- XIf a file contains a magic number for object files, \fIstrings\fR thinks
- Xit has found an object file and interpretes the bytes at a certain location
- Xas offsets into the file. This may lead to errors, but \fIstrings\fR
- Xdoes not tell the user about it.
- X.sp
- XThe way to identify an object file, or even the concept of initialized
- Xdata may vary with systems. When \fIstrings\fR is compiled one can turn
- Xoff this special feature (I_SPECIAL). Consequently the options
- X\fI-a\fR and \fI-\fR
- Xare no longer valid. Invoke \fIstrings\fR with an invalid option to
- Xget a list of possible options.
- X.sp
- XIf \fIstrings\fR is invoked on the standard input it fills up its buffer
- Xbefore it outputs anything. If you enter a line at a time, it
- Xtakes some time before you see the first output.
- X.sp
- XIf \fIstrings\fR is invoked on the standard input, it does not try to
- Xrecognize an object file, but acts as if the flags \fI-a\fR or \fI-\fR
- Xhad been set.
- X.br
- XThe option "c" will not get all C strings. If you use something like
- X.nf
- X printf ("Say something\\r\\n");
- X.fi
- Xand you specify "c" when invoking \fIstrings\fR
- Xyou will not get the string, because it does not end with NUL or LF.
- END_OF_FILE
- if test 4863 -ne `wc -c <'strings.1'`; then
- echo shar: \"'strings.1'\" unpacked with wrong size!
- fi
- # end of 'strings.1'
- fi
- if test -f 'strings.c' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'strings.c'\"
- else
- echo shar: Extracting \"'strings.c'\" \(13637 characters\)
- sed "s/^X//" >'strings.c' <<'END_OF_FILE'
- Xstatic char * sccsid = "@(#) strings.c (v1.6.9 5/23/89)";
- X/*
- X * strings
- X * =======
- X *
- X * Find and output the printable strings with a certain minimal length or
- X * more in any files.
- X *
- X * This is a rewrite of 4BSD strings, which had some errors:
- X * - ^L is a printable character (4.3BSD, SUN OS)
- X * - 0x80 is a printable character (4.3BSD, SUN OS)
- X * - on some systems (MX2) it does not get the segment of initialized data
- X * correctly
- X *
- X * A printable string is any string of printable characters. A printable
- X * character is any obvious one, plus blank. If flag -t was specified TAB
- X * is considered a printable character too. If flag -c was specified
- X * on the command line, only strings are output, which are followed
- X * by LF or NUL, just like the original BSD manual entry claimed.
- X *
- X */
- X
- X# include "strings.h"
- X
- X# include <ctype.h>
- X
- Xchar isp [256];
- Xchar * isp_mid;
- X# define IS_PRINTABLE(c) (isp_mid[c])
- X
- Xint fd;
- X
- XCHAR_TYPE buf [IN_BUF_LEN];
- X
- Xint ind_whole = 0; /* Shall we examine whole file ? */
- Xint ind_offset = 0; /* Offsets required ? */
- Xint min_str_len = 4; /* If a string has more characters, it is output */
- Xint ind_file = 0; /* # of filenames in command */
- Xint ind_c = 0; /* Only strings ending with NUL or LF ? */
- Xint ind_tab = 0; /* Is TAB a printable character ? */
- Xint ind_prefix = 0; /* should the filename be added before string ? */
- Xint ind_version = 0; /* print version information ? */
- X
- XLSEEK_TYPE offset;
- X
- Xchar * cur_file_name;
- X
- Xextern CHAR_TYPE * out_buf, * level;
- Xextern int num_out_buf;
- Xextern int saved;
- X
- X# define SEARCH 1
- X# define DECIDE 2
- X# define TRY 3
- X
- Xusage (n)
- Xint n;
- X{
- X out ("usage: strings [options] [file ...]\n");
- X# ifdef I_SPECIAL
- X out ("-a : look in whole file. Default : only look in initialized data\n");
- X out ("- : the same as -a\n");
- X# endif
- X out ("-o : output offset in decimal before each string.\n");
- X out ("-N : only output sequences of length >= N; N is a number > 0.\n");
- X out ("-c : only output C strings; terminated by \\0 or \\n\n");
- X out ("-e : the next word is taken as a filename, even if it starts with '-'.\n");
- X out ("-t : TAB is considered a printable character too.\n");
- X out ("-p : the name of the input file is output before each string.\n");
- X out ("-v : only print version information, examine no files.\n");
- X# ifdef I_SPECIAL
- X out ("Options can be combined like '-a20ot'.\n");
- X# else
- X out ("Options can be combined like '-20o'.\n");
- X out ("The whole file is scanned.\n");
- X# endif
- X exit (n);
- X}
- X
- Xout_int (n)
- Xregister int n;
- X{
- X char s [20];
- X register int i;
- X s [19] = '\0';
- X i = 18;
- X if (n == 0) {
- X s [18] = '0';
- X i = 17;
- X }
- X while (n != 0) {
- X s[i--] = '0' + (n % 10);
- X n /= 10;
- X }
- X out (s+i+1);
- X}
- X
- Xprint_version()
- X{
- X out (sccsid); out ("\n\n");
- X out ("Compilation flags:\n");
- X out ("FOUND = \""); out (FOUND); out ("\"\n");
- X# ifdef FCNTL
- X out ("FCNTL = "); out_int (FCNTL); out ("\n");
- X# else
- X out ("FCNTL is not defined.\n");
- X# endif FCNTL
- X# ifdef FAST_COPY
- X out ("FAST_COPY is defined.\n");
- X# else
- X out ("FAST_COPY is not defined.\n");
- X# endif FAST_COPY
- X# ifdef WHAT_LSEEK
- X out ("WHAT_LSEEK = "); out_int (WHAT_LSEEK); out ("\n");
- X# else
- X out ("WHAT_LSEEK is not defined.\n");
- X# endif WHAT_LSEEK
- X# ifdef I_SPECIAL
- X out ("I_SPECIAL is defined.\n");
- X# else
- X out ("I_SPECIAL is not defined.\n");
- X# endif I_SPECIAL
- X out ("IN_BUFLEN = "); out_int (IN_BUF_LEN); out ("\n");
- X out ("OUT_BUFLEN = "); out_int (OUT_BUF_LEN); out ("\n");
- X out ("THRESHOLD = "); out_int (THRESHOLD); out ("\n");
- X}
- X
- Xoptions (ac, av, f)
- Xint ac;
- Xchar * av [];
- Xchar ** f;
- X{
- X register int i, j;
- X int take_file = 0; /* is the next word a file ? */
- X int had_whole, had_offset, had_min_str_len;
- X int had_c, had_tab, had_prefix, had_version;
- X
- X had_whole = had_offset = had_min_str_len = had_c = had_tab = 0;
- X had_prefix = had_version = 0;
- X for (i = 1; i < ac; i++) {
- X if (take_file == 0 && av[i][0] == '-') {
- X if (av[i][1] == '\0') {
- X if (had_whole == 1)
- X usage (5);
- X had_whole = 1;
- X ind_whole = 1;
- X } else {
- X for (j = 1; av[i][j] != '\0'; j++) {
- X if ('0' <= av[i][j] && av[i][j] <= '9') {
- X if (had_min_str_len == 1)
- X usage (3);
- X for (min_str_len = 0; '0' <= av[i][j] && av[i][j] <= '9'; j++)
- X min_str_len = min_str_len * 10 + av[i][j] - '0';
- X j--; /* So we don't lose a character */
- X had_min_str_len = 1;
- X } else
- X switch (av[i][j]) {
- X# ifdef I_SPECIAL
- X case 'a':
- X case '-':
- X if (had_whole == 1)
- X usage (5);
- X had_whole = 1;
- X ind_whole = 1;
- X break;
- X# endif
- X case 'o':
- X if (had_offset == 1)
- X usage (6);
- X had_offset = ind_offset = 1;
- X break;
- X case 'c':
- X if (had_c == 1)
- X usage (7);
- X had_c = ind_c = 1;
- X break;
- X case 'f':
- X take_file = 1;
- X break;
- X case 't':
- X if (had_tab == 1)
- X usage (8);
- X had_tab = ind_tab = 1;
- X break;
- X case 'p':
- X if (had_prefix == 1)
- X usage (9);
- X had_prefix = ind_prefix = 1;
- X break;
- X case 'v':
- X if (had_version == 1)
- X usage (10);
- X had_version = ind_version = 1;
- X break;
- X default:
- X usage (8);
- X break;
- X }
- X }
- X }
- X } else {
- X f [ind_file++] = av[i];
- X take_file = 0;
- X }
- X }
- X if (min_str_len <= 0)
- X usage (4);
- X# ifdef DEBUG
- X fprintf (prot, "ind_offset = %3d\n", ind_offset);
- X fprintf (prot, "ind_whole = %3d\n", ind_whole);
- X fprintf (prot, "ind_file = %3d\n", ind_file);
- X fprintf (prot, "ind_c = %3d\n", ind_c);
- X fprintf (prot, "min_str_len = %3d\n", min_str_len);
- X fprintf (prot, "ind_tab = %3d\n", ind_tab);
- X fprintf (prot, "ind_prefix = %3d\n", ind_prefix);
- X fprintf (prot, "ind_version = %3d\n", ind_version);
- X if (ind_file == 0)
- X fprintf (prot, "had no files on command line\n");
- X else {
- X fprintf (prot, "had %1d files on command line\n", ind_file);
- X for (i = 0; i < ind_file; i++)
- X fprintf (prot, "%s\n", f[i]);
- X }
- X# endif
- X}
- X
- Xinit ()
- X{
- X register int i;
- X int min;
- X char c;
- X
- X min = 0;
- X for (i = 0; i < 256; i++) {
- X c = i;
- X if (c < min)
- X min = c;
- X }
- X isp_mid = isp - min;
- X for (i = 0; i < 256; i++) {
- X c = i;
- X isp_mid [c] = isascii(c) && isprint(c);
- X }
- X if (ind_tab)
- X isp_mid['\t'] = 1;
- X
- X init_output ();
- X}
- X
- Xmain (argc, argv)
- Xint argc;
- Xchar * argv[];
- X{
- X register int i;
- X char ** f;
- X
- X# ifdef DEBUG
- X if ((prot = fopen ("prot", "w")) == NULL) {
- X fprintf (stderr, "could not open prot\n");
- X exit (1);
- X }
- X# endif
- X f = (char **) malloc ((unsigned)(sizeof (char *) * argc));
- X options (argc, argv, f);
- X if (ind_version) {
- X print_version ();
- X exit (0);
- X }
- X init ();
- X
- X if (ind_file == 0)
- X strings ((char*)NULL);
- X else
- X for (i = 0; i < ind_file; i++) {
- X if (ind_file != 1)
- X out_name (f[i]);
- X strings (f[i]);
- X }
- X exit (0);
- X}
- X
- Xout_name (b)
- Xregister CHAR_TYPE * b;
- X{
- X CHAR_TYPE s [45];
- X CHAR_TYPE * s2 = (CHAR_TYPE*) " ";
- X register int n, i;
- X
- X for (i = 0; i < 45; i++)
- X s [i] = '-';
- X n = strlen (s);
- X i = strlen (b);
- X# ifdef DEBUG
- X fprintf (prot, "out_name :: n = %d, i = %d\n", n, i);
- X# endif
- X if (n*2 + (i+2) > 80)
- X n = (80 - (i+2)) / 2;
- X# ifdef DEBUG
- X fprintf (prot, "out_name :: first string is %d long.\n", n);
- X# endif
- X (void) append (s, s+n, 0);
- X
- X (void) append (s2, s2+1, 0);
- X (void) append (b, b+i, 0);
- X (void) append (s2, s2+1, 0);
- X
- X if (2*n + (i+2) < 80)
- X n++;
- X# ifdef DEBUG
- X fprintf (prot, "out_name :: second string is %d long.\n", n);
- X# endif
- X (void) append (s, s+n, 1);
- X}
- X
- Xint
- Xexamine (state, n)
- Xregister int state;
- Xint n;
- X/*
- X * Find strings of printable characters in buf and append them to
- X * the output buffer, if they meet certain conditions.
- X *
- X * The main part of this routine is a DFA (deterministic finite automaton) with
- X * three states.
- X * These states are
- X * SEARCH : search for a printable character by examining characters in
- X * distance min_str_len. If found, set b1 to the start
- X * of the sequence and enter state TRY.
- X * TRY : We have found a printable character. Set b2 to the first character
- X * after the end of the sequence by single stepping. Set state to
- X * DECIDE.
- X * DECIDE : We have found a sequence of printable characters. If the first
- X * character after the sequence is in the buffer, then we can decide
- X * what to do with the sequence (even if flag -c was not specified).
- X * If not then the stuff is buffered, state set to TRY, and returned
- X * to the caller to read a new block of input.
- X * It is tested whether the sequence meets the requirements.
- X * Either it is output by placing it permanently into the output
- X * buffer, or it is forgotten.
- X */
- X{
- X register CHAR_TYPE * b, * b1, * b2, * end;
- X
- X end = buf + n;
- X
- X b = b1 = b2 = buf;
- X for (;b < end; b = b2+1) {
- X# ifdef DEBUG
- X fprintf (prot, "state = %s; b at %d\n",
- X state == SEARCH ? "SEARCH" : (state == DECIDE ? "DECIDE" : "TRY"),
- X (int)(b - buf));
- X# endif
- X b1 = b;
- X switch (state) {
- X case SEARCH:
- X /*
- X * Search a character which might be in a sequence of
- X * printable characters. Note that it suffices to examine
- X * characters in distance min_str_len.
- X */
- X for (;b2 < end && !IS_PRINTABLE(*b2); b2 += min_str_len);
- X /*
- X * If we have stepped outside the buffer, we must examine
- X * the end of the buffer yet.
- X */
- X if (b2 >= end)
- X b2 = end;
- X b1 = b2-1;
- X /*
- X * Find the start of the current sequence.
- X */
- X while (b1 >= buf && IS_PRINTABLE(*b1))
- X b1--;
- X b1++;
- X if (b1 >= end)
- X return (SEARCH);
- X /* FALL THROUGH */
- X case TRY:
- X /*
- X * Find the end of the current sequence. Set b2 one beyond.
- X */
- X while (b2 < end && IS_PRINTABLE(*b2))
- X b2++;
- X# ifdef DEBUG
- X fprintf (prot, "found seq between %1d and %1d -->",
- X (int)(b1-buf), (int)(b2-buf));
- X { CHAR_TYPE * tmp;
- X for (tmp = b1; tmp < b2; tmp++)
- X if (IS_PRINTABLE(*tmp))
- X fputc (*tmp, prot);
- X else
- X fputc ('.', prot);
- X }
- X fprintf (prot, "<--\n");
- X# endif
- X /*
- X * Should set state to DECIDE; but we don't need it.
- X * state will be reset anyway.
- X */
- X /* FALL THROUGH */
- X case DECIDE:
- X /*
- X * Can we decide what to do with the sequence which
- X * we have found? We cannot, if we are at the end of
- X * the block, because we need just one more character.
- X */
- X if (b2 >= end) {
- X# ifdef DEBUG
- X fprintf (prot, "I cannot decide. Must read a new block.\n");
- X# endif
- X (void) append (b1, b2, 0);
- X return (TRY);
- X }
- X# ifdef DEBUG
- X fprintf (prot, "I can decide.\n");
- X if (ind_c) {
- X if (*b2 == '\0' || *b2 == '\n')
- X fprintf (prot, "String is a C string; followed by %s\n",
- X *b2 == '\0'?"NUL":"\\n");
- X }
- X# endif
- X if (((int)(b2-b1)+saved >= min_str_len) &&
- X (!ind_c || (*b2 == '\0' || *b2 == '\n'))) {
- X /*
- X * String is accepted. Copy it to the output buffer.
- X */
- X# ifdef DEBUG
- X fprintf (prot, "Accept string.\n");
- X# endif
- X (void) append (b1, b2, 1);
- X } else {
- X /*
- X * String is refused. Forget any temporarily buffered
- X * stuff in output buffer.
- X */
- X# ifdef DEBUG
- X fprintf (prot, "String refused.\n");
- X# endif
- X level = out_buf + num_out_buf;
- X saved = 0;
- X }
- X state = SEARCH;
- X } /* switch */
- X } /* for (;b < end; ... */
- X return (state);
- X}
- X
- Xstrings (name)
- Xchar * name;
- X/*
- X * Find strings in a file or an input stream.
- X * This routine sets the limits to handle a file, either to the
- X * whole file, or to the initialized data only.
- X * In a loop it reads blocks from the file and calls the DFA ('examine').
- X * Examine returns its state, so that it can be reentered at the
- X * right place.
- X */
- X{
- X register int n, state;
- X LSEEK_TYPE l, first, last;
- X
- X if (name == NULL) {
- X fd = 0;
- X } else
- X if ((fd = open (name, O_RDONLY, 0)) == -1) {
- X perror (name);
- X return;
- X }
- X if (name == NULL || ind_whole == 1) {
- X first = (LSEEK_TYPE)0;
- X last = (LSEEK_TYPE)(-1); /* --> no limit */
- X } else {
- X# ifdef I_SPECIAL
- X /*
- X * Get the limits for reading.
- X * If the file is not an object, then we look at whole file.
- X */
- X get_limits (fd, &first, &last);
- X# ifdef DEBUG
- X fprintf (prot, "lseek to %ld; last = %ld\n", (long)first, (long)last);
- X# endif
- X if (lseek (fd, first, 0) != first) {
- X perror ("lseek");
- X return;
- X }
- X# else I_SPECIAL
- X first = (LSEEK_TYPE)0;
- X last = (LSEEK_TYPE)(-1); /* --> no limit */
- X# endif I_SPECIAL
- X }
- X
- X cur_file_name = name;
- X offset = first;
- X state = SEARCH;
- X for (;;) {
- X /*
- X * Do we really have to read a block ?
- X * How much should we read? The difficult thing here
- X * is to watch out not to read beyond the limits of
- X * initialized data.
- X */
- X if (last != (LSEEK_TYPE)(-1)) {
- X l = last - offset;
- X if (l <= 0)
- X break;
- X if (l > IN_BUF_LEN)
- X l = IN_BUF_LEN;
- X } else
- X l = IN_BUF_LEN;
- X# ifdef DEBUG
- X fprintf (prot, "reading %1ld chars\n", l);
- X# endif
- X if ((n = read (fd, buf, (int)l)) <= 0)
- X break;
- X# ifdef DEBUG
- X fprintf (prot, "read %1d characters\n", n);
- X fflush (prot);
- X# endif
- X
- X state = examine (state, n);
- X
- X offset += n;
- X }
- X if (n == -1)
- X perror ("read");
- X /*
- X * If the piece of the file ended with a string of printable characters,
- X * we must check whether this string is valid.
- X * We need not peek at the first character after the strings, as we know
- X * that it cannot be \0 or \n.
- X */
- X if (saved > 0 && !ind_c)
- X if (saved >= min_str_len)
- X (void) append (buf, buf, 1);
- X /*
- X * We must flush the output buffer.
- X */
- X flush_output ();
- X if (name != 0)
- X (void)close (fd);
- X}
- X
- X# ifndef FAST_COPY
- Xchar *
- XFAST_COPY (from, to, count)
- Xregister char * from, * to;
- Xregister int count;
- X{
- X register char * tmp;
- X
- X tmp = to;
- X while (count--)
- X *to++ = *from++;
- X return (tmp);
- X}
- X# endif FAST_COPY
- END_OF_FILE
- if test 13637 -ne `wc -c <'strings.c'`; then
- echo shar: \"'strings.c'\" unpacked with wrong size!
- fi
- # end of 'strings.c'
- fi
- if test -f 'strings.h' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'strings.h'\"
- else
- echo shar: Extracting \"'strings.h'\" \(1170 characters\)
- sed "s/^X//" >'strings.h' <<'END_OF_FILE'
- X/*
- X * Main include file for 'strings'.
- X * Tests Makefile flags and takes appropriate action.
- X * Includes most include files and predefines global defines.
- X *
- X * sccsid : @(#) strings.h (v6.3 5/22/89)
- X */
- X
- X# include "config.h"
- X
- X# if defined(DEBUG)
- X# include <stdio.h>
- X# else
- X# define stderr 2
- X# define NULL 0
- X# endif DEBUG
- X
- X# ifdef FCNTL
- X# if FCNTL == 1
- X# include <fcntl.h>
- X# endif
- X# if FCNTL == 2
- X# include <sys/fcntl.h>
- X# endif
- X# else FCNTL
- X# define O_RDONLY 0
- X# endif FCNTL
- X
- X# ifdef DEBUG
- XFILE * prot;
- X# endif
- X
- X# ifdef I_SPECIAL
- X# ifdef WHAT_LSEEK
- X# if WHAT_LSEEK == 1
- X# define LSEEK_TYPE long
- X# endif
- X# if WHAT_LSEEK == 2
- X# define LSEEK_TYPE int
- X# endif
- X# endif WHAT_LSEEK
- X
- X# ifndef LSEEK_TYPE
- X# include "ERROR : WHAT_LSEEK defined wrongly"
- X# endif LSEEK_TYPE
- X
- Xextern LSEEK_TYPE lseek (/* int fd; LSEEK_TYPE offset; int whence */);
- X
- X# else I_SPECIAL
- X/*
- X * If I_SPECIAL is not defined we don't need lseek, but we need a type
- X * for offsets.
- X */
- X# define LSEEK_TYPE long
- X# endif I_SPECIAL
- X
- X# if THRESHOLD < IN_BUF_LEN
- X# include "ERROR : THRESHOLD must be >= IN_BUF_LEN"
- X# endif
- X
- Xextern char * malloc ();
- Xextern char * realloc ();
- X
- X# define CHAR_TYPE char
- END_OF_FILE
- if test 1170 -ne `wc -c <'strings.h'`; then
- echo shar: \"'strings.h'\" unpacked with wrong size!
- fi
- # end of 'strings.h'
- fi
- if test -f 'strings.txt' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'strings.txt'\"
- else
- echo shar: Extracting \"'strings.txt'\" \(5006 characters\)
- sed "s/^X//" >'strings.txt' <<'END_OF_FILE'
- XNAME
- X strings - find and output the printable strings in a file
- X
- XSYNOPSIS
- X strings [ -option ] [ filename ] ...
- X
- X option is one of a,-,o,p,t,v,e,c or a number.
- X
- XDESCRIPTION
- X "strings" looks for sequences of printable characters in a file and outputs
- X them, if they are longer than "number". Default for "number" is 4.
- X If "strings" identifies a file as containing an object, it will only scan
- X the initialized data segment of the object. If you specified either the
- X "-a" or "-" flags it will scan the whole file.
- X If there is more than one filename specified, the name of the file
- X is output before the stuff from that file.
- X If there is no filename specified "strings" will read from standard input.
- X
- XOPTIONS
- X "-" or
- X "-a" examine the entire file for sequences of printable characters. Default
- X on UNIX (trademark of AT&T) systems is to examine only the segment of
- X initialized data. On systems where such a segment does not exist, the
- X options "-a" and "-" do not exist.
- X "-o" precede each output string by its offset from the beginning of the
- X file (in decimal). The number is output with (at least) 7 digits,
- X filled with blanks. Then follows a blank, then the found string.
- X If these 7 digits are not sufficent to hold the offset, "strings"
- X will use 8 digits or as much as necessary (and possible :-).
- X "number" only output sequences longer than or as long as "number".
- X Only decimal values are accepted.
- X "-p" prefix each output with the name of the input file. This is useful
- X e.g. if "strings" is used on a list of files and "fgrep" is used to
- X search for a certain string. When this string is found you will get
- X the name of the file the string was found in.
- X "-e" the next argument is interpreted as a filename, even if it starts
- X with a "-". Thus "strings -e -a" will try to read from file "-a".
- X "-t" TAB is considered a printable character.
- X "-c" output only strings which end with a NUL or LF character. Most strings
- X in C programs will result in strings like this.
- X "-v" print information about "strings" version, including the flags
- X with which it was compiled. This may be uninteresting to the normal
- X user, but is quite informative when porting to another system.
- X Nothing else is output, the program exits immediately afterwards.
- X
- X Options can be specified in a single word. They apply to all files
- X specified. It is an error for an option to be specified twice.
- X The following invocations are correct:
- X
- X strings -ao12 m1 m2 m3
- X strings -a -ot -12 m1
- X strings -a m1 -o m2
- X strings - -o
- X strings core -a blabla -o rhabarber -12
- X
- X The following invocations are incorrect:
- X
- X strings -a - BBB
- X strings -aa m
- X strings -12a14 core
- X
- XAUTHOR
- X Michael Greim
- X
- X greim@sbsvax.uucp
- X greim@sbsvax.informatik.uni-saarland.dbp.de
- X
- XDIAGNOSTICS
- X Diagnostics are always written to file descriptor 2, i.e. if a file cannot
- X be opened or malloc runs out of memory.
- X If the user makes an error when invoking "strings", a table with
- X the correct syntax will be output.
- X If the output buffer overflows, the program tries to enlarge it using
- X realloc. If this fails, it prints a message and exits
- X immediately.
- X
- XBUGS
- X "Strings" does not inform the user, whether it has found an object file
- X or not.
- X
- X If you have a file which contains a long sequence of printable characters,
- X e.g. about as long as to become as large as your computers memory size,
- X "strings" might run out of memory. It will print "REALLOC error" and
- X exit.
- X
- X If a file contains a magic number for object files, "strings" thinks
- X it has found an object file and interpretes the bytes at a certain location
- X as offsets into the file. This may lead to errors, but "strings"
- X does not tell the user about it.
- X
- X The way to identify an object file, or even the concept of initialized
- X data may vary with systems. When "strings" is compiled one can turn
- X off this special feature (I_SPECIAL). Consequently the options
- X "-a" and "-" are no longer valid. Invoke "strings" with an invalid
- X option to get a list of possible options.
- X
- X If "strings" is invoked on the standard input it fills up its buffer
- X before it outputs anything. If you enter a line at a time, it
- X takes some time before you see the first output.
- X
- X If "strings" is invoked on the standard input, it does not try to
- X recognize an object file, but acts as if the flags "-a" or "-"
- X had been set.
- X
- X The option "c" will not get all C strings. If you use something like
- X printf ("Say something\r\n");
- X and you specify "c" when invoking "strings" you will not get the string,
- X because it does not end with NUL or LF.
- END_OF_FILE
- if test 5006 -ne `wc -c <'strings.txt'`; then
- echo shar: \"'strings.txt'\" unpacked with wrong size!
- fi
- # end of 'strings.txt'
- fi
- if test -f 'test_input' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'test_input'\"
- else
- echo shar: Extracting \"'test_input'\" \(201 characters\)
- sed "s/^X//" >'test_input' <<'END_OF_FILE'
- Xxbtoa Begin
- X<+oue+DGm>@3BZ'F*&OCAfu/:EbTE(F"Rf!BPDN1Ch[cu+Cf>-FCAm$F!*%u$p6*a@3B&uDKKo;C`m
- XOH$@l)hBlbD5Bl7K)F*(i2FE8RKBln#2@3BH+DCo:<@qfX"@q]:k@:OCjEZd&\3%url
- Xxbtoa End N 116 74 E 9d S 27d9 R f6b359b1
- END_OF_FILE
- if test 201 -ne `wc -c <'test_input'`; then
- echo shar: \"'test_input'\" unpacked with wrong size!
- fi
- # end of 'test_input'
- fi
- if test -f 'tune.h' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'tune.h'\"
- else
- echo shar: Extracting \"'tune.h'\" \(2077 characters\)
- sed "s/^X//" >'tune.h' <<'END_OF_FILE'
- X/*
- X *
- X * sccsid = @(#) tune.h (v6.2 5/17/89)
- X *
- X * This file contains the flags with which you can tune strings.
- X * Define USE_USER_DEFINES in Makefile to really get them going.
- X */
- X
- X/*
- X * Look in /usr/include, /usr/include/sys, ...
- X * Where are the flags O_RDONLY, O_WRONLY, ... ?
- X * Define FCNTL as
- X * 1 : /usr/include/fcntl.h
- X * 2 : /usr/include/sys/fcntl.h
- X * If they are not defined, then omit FCNTL.
- X * strings will use old flag 0 (read only).
- X *
- X * Examples:
- X * vax BSD43, sun SUN OS : 1
- X * mx2 SINIX (System III) : 2
- X */
- X# define FCNTL 2
- X
- X/*
- X * We copy sequences of characters with a fast memory copy routine.
- X * On BSD systems this routine is called bcopy. Its usage is :
- X * bcopy (from, to, count)
- X * On SYS 5 systems this routine is called memcpy. Its usage is :
- X * memcpy (to, from, count)
- X * Decide what your system uses and define bcopy accordingly.
- X * If you don't define FAST_COPY here, a routine of my own, called
- X * FAST_COPY will be used.
- X */
- X# define FAST_COPY(from,to,count) bcopy(from,to,count)
- X
- X/*
- X * Take a look at your manual. What does it say about 'lseek' ?
- X * Is it :
- X * 1 : long lseek (int fd, long dist, int whence)
- X * 2 : int lseek (int fd, int dist, int whence)
- X * If it is something else, you will have to change the source. You can
- X * ignore it of course, if int==long on your machine.
- X */
- X# define WHAT_LSEEK 1
- X
- X/*
- X * As a default strings looks at the beginning of a file. If it thinks
- X * it has found an object, it only examines the initialized data in it.
- X * If you want this behaviour define I_SPECIAL. If you always want the
- X * whole file examined, or if you are on a non-UNIX machine where the
- X * identification of object files is different, do not define I_SPECIAL.
- X */
- X# define I_SPECIAL
- X
- X/*
- X * How big should the buffers be?
- X * IN_BUF_LEN : Length of input buffer for reads.
- X * OUT_BUF_LEN : Maximal size of output buffer.
- X * TRHESHOLD : Minimal size for writes. If the output buffer grows beyond
- X * THRESHOLD then it is output.
- X */
- X# define IN_BUF_LEN 1024
- X# define OUT_BUF_LEN 8192
- X# define THRESHOLD 1024
- END_OF_FILE
- if test 2077 -ne `wc -c <'tune.h'`; then
- echo shar: \"'tune.h'\" unpacked with wrong size!
- fi
- # end of 'tune.h'
- fi
- echo shar: End of archive 1 \(of 1\).
- cp /dev/null ark1isdone
- MISSING=""
- for I in 1 ; do
- if test ! -f ark${I}isdone ; then
- MISSING="${MISSING} ${I}"
- fi
- done
- if test "${MISSING}" = "" ; then
- echo You have the archive.
- rm -f ark[1-9]isdone
- else
- echo You still need to unpack the following archives:
- echo " " ${MISSING}
- fi
- ## End of shell archive.
- exit 0
-
-
-